install.packages("DeclareDesign")
install.packages("igraph")
devtools::install_github("gerasy1987/hiddenmeta", build_vignettes = TRUE)library(hiddenmeta)
library(DeclareDesign)
library(igraph)
library(knitr)## STUDY 1
study_1 <-
list(
pop =
list(
handler = get_study_population,
# network structure setup
network_handler = sim_block_network,
network_handler_args =
list(N = 1000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = 0,
p_edge_within = list(known = c(0.1, 0.3), hidden = c(0.1, 0.3)),
p_edge_between = list(known = 0.1, hidden = 0.1),
directed = FALSE),
# groups
group_names = c("known", "hidden"),
# probability of visibility (show-up) for each group
p_visible = list(known = 1, hidden = 1),
# probability of service utilization in hidden population
# for service multiplier
add_groups =
list(service_use = "rbinom(n(), 1, 0.25)",
"purrr::map_df(hidden, ~ sapply( `names<-`(rep(0, times = 10), paste0('loc_', 1:10)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
known_2 = 0.3, known_3 = 0.3)
),
sample =
list(
rds = list(handler = sample_rds,
# RDS parameters
sampling_variable = "rds",
hidden_var = "hidden", # default
n_seed = 20,
n_coupons = 3,
add_seeds = 3,
target_type = "sample",
target_n_rds = 60),
tls = list(handler = sample_tls,
sampling_variable = "tls",
# TLS sampling parameters
target_n_clusters = 4,
target_n_tls = 180,
cluster = paste0("loc_", 1:10)),
pps = list(handler = sample_pps,
sampling_variable = "pps",
# prop sampling parameters
sampling_frame = NULL,
strata = NULL,
cluster = NULL,
target_n_pps = 200)
),
inquiries = list(handler = get_study_estimands,
known_pattern = "^known$",
hidden_var = "hidden"),
estimators =
list(
rds =
list(sspse = list(handler = get_study_est_sspse,
prior_mean = 100,
mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
total = 1000,
rds_prefix = "rds",
label = "rds_sspse"),
chords = list(handler = get_study_est_chords,
type = "mle",
seed_condition = "rds_from == -999",
n_boot = 100,
rds_prefix = "rds",
label = "rds_chords"),
multiplier = list(handler = get_study_est_multiplier,
service_var = "service_use",
seed_condition = "rds_from == -999",
n_boot = 100,
rds_prefix = "rds",
label = "rds_multi")),
tls =
list(ht = list(handler = get_study_est_ht,
weight_var = "tls_weight",
prefix = "tls",
label = "tls_ht"),
nsum = list(handler = get_study_est_nsum,
known = c("known", "known_2", "known_3"),
hidden = "hidden_visible_out",
survey_design = ~ tls_cluster,
n_boot = 100,
prefix = "tls",
label = "tls_nsum"),
recap = list(handler = get_study_est_recapture,
capture_parse =
"strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
sample_condition = "tls == 1",
model = "Mt",
hidden_variable = "hidden",
label = "tls_recap")),
pps =
list(ht = list(handler = get_study_est_ht,
prefix = "pps",
label = "pps_ht"),
nsum = list(handler = get_study_est_nsum,
known = c("known", "known_2", "known_3"),
hidden = "hidden_visible_out",
survey_design = ~ pps_cluster + strata(pps_strata),
n_boot = 100,
prefix = "pps",
label = "pps_nsum")),
all =
list(recap1 = list(handler = get_study_est_recapture,
capture_vars = c("rds", "pps"),
model = "Mt",
hidden_variable = "hidden",
label = "rds_pps_recap"),
recap2 = list(handler = get_study_est_recapture,
capture_vars = c("rds"),
capture_parse =
"strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
model = "Mt",
hidden_variable = "hidden",
label = "rds_tls_recap"))
)
)study_population <-
eval(as.call(c(list(declare_population), study_1$pop)))
set.seed(872312)
example_pop <- study_population()
example_pop %>%
dplyr::sample_n(n()) %>% # shuffle data frame
rmarkdown::paged_table(options = list(rows.print = 15))g <-
example_pop %$% {
hiddenmeta:::retrieve_graph(links) %>%
igraph::set_vertex_attr("name", value = name) %>%
igraph::set_vertex_attr("type", value = type)
}
igraph::V(g)$color <-
plyr::mapvalues(igraph::V(g)$type,
from = unique(igraph::V(g)$type),
to = grDevices::palette.colors(n = length(unique(igraph::V(g)$type)),
palette = "Set 3"))
plot(g,
# layout = igraph::layout_on_grid(g, dim = 2, width = 100),
layout = igraph::layout_on_grid(g, dim = 2, width = 150),
vertex.size = 1.5, vertex.dist = 4, vertex.label = NA, edge.width = .2,
edge.arrow.size = .2, edge.curved = .2)
legend(x = -1, y = -1.2,
legend = c("none", "known only", "hidden only", "both"),
pt.bg = grDevices::palette.colors(n = length(unique(igraph::V(g)$type)), palette = "Set 3"),
pch = 21, col = "#777777", pt.cex = 1, cex = 1, bty = "o", ncol = 2)The sampling procedures are additive in a sense that each procedure appends several columns relevant to the sampling procedure and particular draw based on population simulation, but does not change the study population data frame (unless you specify drop_nonsampled = TRUE).
study_sample_rds <-
eval(as.call(c(list(declare_sampling), study_1$sample$rds)))
set.seed(872312)
draw_data(study_population + study_sample_rds) %>%
dplyr::sample_n(n()) %>% # shuffle data frame
rmarkdown::paged_table(options = list(rows.print = 15))study_sample_pps <-
eval(as.call(c(list(declare_sampling), study_1$sample$pps)))
set.seed(872312)
draw_data(study_population + study_sample_rds + study_sample_pps) %>%
dplyr::sample_n(n()) %>% # shuffle data frame
rmarkdown::paged_table(options = list(rows.print = 15))study_sample_tls <-
eval(as.call(c(list(declare_sampling), study_1$sample$tls)))
set.seed(872312)
draw_data(study_population +
study_sample_rds + study_sample_pps + study_sample_tls) %>%
dplyr::sample_n(n()) %>% # shuffle data frame
rmarkdown::paged_table(options = list(rows.print = 15))study_estimands <-
eval(as.call(c(list(declare_inquiry), study_1$inquiries)))
set.seed(872312)
draw_estimands(study_population +
# study_sample_rds + study_sample_pps + study_sample_tls +
study_estimands) %>%
kable(digits = 2)| inquiry | estimand |
|---|---|
| known_size | 318.00 |
| hidden_size | 100.00 |
| known_prev | 0.32 |
| hidden_prev | 0.10 |
| hidden_size_in_known | 38.00 |
| hidden_prev_in_known | 0.12 |
est_sspse <-
eval(as.call(c(list(declare_estimator), study_1$estimators$rds$sspse)))
est_chords <-
eval(as.call(c(list(declare_estimator), study_1$estimators$rds$chords)))
est_multi <-
eval(as.call(c(list(declare_estimator), study_1$estimators$rds$multiplier)))
est_ht_pps <-
eval(as.call(c(list(declare_estimator), study_1$estimators$pps$ht)))
est_nsum_pps <-
eval(as.call(c(list(declare_estimator), study_1$estimators$pps$nsum)))
est_ht_tls <-
eval(as.call(c(list(declare_estimator), study_1$estimators$tls$ht)))
est_nsum_tls <-
eval(as.call(c(list(declare_estimator), study_1$estimators$tls$nsum)))
est_recap_tls <-
eval(as.call(c(list(declare_estimator), study_1$estimators$tls$recap)))
est_recap_rds_pps <-
eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap1)))
est_recap_rds_tls <-
eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap2)))
set.seed(872312)
draw_estimates(study_population +
study_sample_rds + study_sample_pps + study_sample_tls +
study_estimands +
est_sspse + est_chords + est_multi +
est_nsum_pps + est_ht_pps +
est_nsum_tls + est_ht_tls + est_recap_tls +
est_recap_rds_pps + est_recap_rds_tls) %>%
kable(digits = 2)| estimator | estimate | se | inquiry |
|---|---|---|---|
| hidden_size_rds_sspse | 104.00 | 26.30 | hidden_size |
| hidden_size_rds_chords | 37.00 | 24.05 | hidden_size |
| hidden_size_rds_multi | 104.00 | 29.33 | hidden_size |
| hidden_size_pps_nsum | 101.82 | 6.32 | hidden_size |
| hidden_size_pps_ht | 85.00 | 19.85 | hidden_size |
| hidden_prev_pps_ht | 0.09 | 0.02 | hidden_prev |
| hidden_size_tls_nsum | 106.11 | 2.37 | hidden_size |
| hidden_size_tls_ht | 4.03 | 0.85 | hidden_size |
| hidden_prev_tls_ht | 0.00 | 0.00 | hidden_prev |
| hidden_size_tls_recap | 51.07 | 21.46 | hidden_size |
| hidden_size_rds_pps_recap | 98.82 | 16.27 | hidden_size |
| hidden_size_rds_tls_recap | 98.83 | 9.77 | hidden_size |
study1 <-
study_population +
study_sample_rds + study_sample_pps + study_sample_tls +
study_estimands +
est_sspse + est_chords + est_multi +
est_nsum_pps + est_ht_pps +
est_nsum_tls + est_ht_tls + est_recap_tls +
est_recap_rds_pps + est_recap_rds_tls
# can draw one of the samples
study1$study_sample_rds(study1$study_population())
#> # A tibble: 1,000 × 84
#> name type known hidden links service_use loc_1 loc_2 loc_3 loc_4 loc_5
#> <int> <chr> <int> <int> <chr> <int> <int> <int> <int> <int> <int>
#> 1 1 00 0 0 11;136;41… 0 0 0 0 0 1
#> 2 2 00 0 0 88;181;24… 0 0 0 1 0 0
#> 3 3 00 0 0 41;82;260… 1 0 0 0 0 0
#> 4 4 00 0 0 24;212;61… 0 0 0 0 0 0
#> 5 5 00 0 0 43;69;72;… 0 1 0 0 0 0
#> 6 6 00 0 0 53;561;60… 1 0 0 0 1 0
#> 7 7 00 0 0 101;177;2… 0 0 0 0 0 0
#> 8 8 00 0 0 11;157;33… 0 0 0 0 0 1
#> 9 9 00 0 0 14;311;35… 0 0 0 0 0 0
#> 10 10 00 0 0 30;161;21… 0 0 0 0 0 1
#> # … with 990 more rows, and 73 more variables: loc_6 <int>, loc_7 <int>,
#> # loc_8 <int>, loc_9 <int>, loc_10 <int>, known_2 <int>, known_3 <int>,
#> # n_visible_out <dbl>, known_visible_out <dbl>, hidden_visible_out <dbl>,
#> # type_00_visible_out <dbl>, type_01_visible_out <dbl>,
#> # type_10_visible_out <dbl>, type_11_visible_out <dbl>,
#> # service_use_visible_out <dbl>, loc_1_visible_out <dbl>,
#> # loc_2_visible_out <dbl>, loc_3_visible_out <dbl>, …
# can draw full data
study1_data <- draw_data(study1)
# can draw estimands
study1$inquiry(study1_data)
#> inquiry estimand
#> 1 known_size 313.00000000
#> 2 hidden_size 95.00000000
#> 3 known_prev 0.31300000
#> 4 hidden_prev 0.09500000
#> 5 hidden_size_in_known 22.00000000
#> 6 hidden_prev_in_known 0.07028754
# can draw any of the estimators
study1$rds_sspse(study1_data)
#> estimator estimate se inquiry
#> 1 hidden_size_rds_sspse 150.5 92.57726 hidden_size
study1$pps_ht(study1_data)
#> estimator estimate se inquiry
#> 1 hidden_size_pps_ht 110.00 23.66712443 hidden_size
#> 2 hidden_prev_pps_ht 0.11 0.02366712 hidden_prev
study1$rds_pps_recap(study1_data)
#> estimator estimate se inquiry
#> 1 hidden_size_rds_pps_recap 92.53333 11.97572 hidden_sizerequireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 7)
set.seed(872312)
study1_simulations <-
plyr::llply(
as.list(1:100),
.fun = function(x) {
base::suppressWarnings(
DeclareDesign::simulate_design(study1, sims = 1) %>%
dplyr::mutate(
sim_ID = x
)
)
},
.parallel = TRUE
) %>%
dplyr::bind_rows()
saveRDS(study1_simulations, "vignettes/study1_sim.rds")study_diagnosands <-
declare_diagnosands(
mean_estimand = mean(estimand),
mean_estimate = mean(estimate),
sd_estimate = sd(estimate),
mean_se = mean(se),
bias = mean(estimate - estimand),
rmse = sqrt(mean((estimate - estimand) ^ 2))
)
study1_simulations <- readRDS(here::here("vignettes/study1_sim.rds"))
diagnose_design(simulations_df = study1_simulations,
diagnosands = study_diagnosands) %>%
reshape_diagnosis %>% select(-'Design') %>%
kable()| Inquiry | Estimator | N Sims | Mean Estimand | Mean Estimate | SD Estimate | Mean Se | Bias | RMSE |
|---|---|---|---|---|---|---|---|---|
| hidden_prev | hidden_prev_pps_ht | 100 | 0.10 | 0.11 | 0.03 | 0.02 | 0.01 | 0.03 |
| (0.00) | (0.00) | (0.00) | (0.00) | (0.00) | (0.00) | |||
| hidden_prev | hidden_prev_tls_ht | 100 | 0.10 | 0.00 | 0.00 | 0.00 | -0.09 | 0.09 |
| (0.00) | (0.00) | (0.00) | (0.00) | (0.00) | (0.00) | |||
| hidden_prev_in_known | NA | 100 | 0.09 | NA | NA | NA | NA | NA |
| (0.00) | NA | NA | NA | NA | NA | |||
| hidden_size | hidden_size_pps_ht | 100 | 95.17 | 106.50 | 28.15 | 21.77 | 11.33 | 27.10 |
| (0.57) | (2.75) | (2.32) | (0.27) | (2.45) | (2.18) | |||
| hidden_size | hidden_size_pps_nsum | 100 | 95.17 | 99.37 | 12.57 | 6.71 | 4.20 | 9.68 |
| (0.57) | (1.14) | (1.04) | (0.07) | (0.76) | (0.72) | |||
| hidden_size | hidden_size_rds_chords | 100 | 95.17 | 77.37 | 38.38 | 35.41 | -17.80 | 43.65 |
| (0.57) | (3.95) | (2.28) | (0.81) | (4.10) | (1.92) | |||
| hidden_size | hidden_size_rds_multi | 100 | 95.17 | 91.42 | 13.31 | 23.72 | -3.75 | 13.59 |
| (0.57) | (1.39) | (1.17) | (1.02) | (1.39) | (0.98) | |||
| hidden_size | hidden_size_rds_pps_recap | 100 | 95.17 | 101.07 | 19.64 | 15.32 | 5.90 | 17.13 |
| (0.57) | (1.90) | (1.40) | (0.60) | (1.62) | (1.28) | |||
| hidden_size | hidden_size_rds_sspse | 100 | 95.17 | 159.71 | 32.63 | 92.65 | 64.54 | 72.41 |
| (0.57) | (3.03) | (4.67) | (3.82) | (3.04) | (4.44) | |||
| hidden_size | hidden_size_rds_tls_recap | 100 | 95.17 | 98.38 | 11.34 | 10.23 | 3.21 | 11.04 |
| (0.57) | (1.23) | (0.78) | (0.29) | (1.12) | (0.83) | |||
| hidden_size | hidden_size_tls_ht | 100 | 95.17 | 3.43 | 1.15 | 0.90 | -91.74 | 91.91 |
| (0.57) | (0.11) | (0.07) | (0.02) | (0.53) | (0.54) | |||
| hidden_size | hidden_size_tls_nsum | 100 | 95.17 | 98.73 | 14.26 | 7.60 | 3.56 | 11.03 |
| (0.57) | (1.53) | (1.10) | (0.31) | (1.14) | (0.86) | |||
| hidden_size | hidden_size_tls_recap | 100 | 95.17 | 33.72 | 15.29 | 13.19 | -61.45 | 63.34 |
| (0.57) | (1.76) | (3.00) | (1.54) | (1.70) | (1.25) | |||
| hidden_size_in_known | NA | 100 | 27.35 | NA | NA | NA | NA | NA |
| (0.34) | NA | NA | NA | NA | NA | |||
| known_prev | NA | 100 | 0.30 | NA | NA | NA | NA | NA |
| (0.00) | NA | NA | NA | NA | NA | |||
| known_size | NA | 100 | 301.35 | NA | NA | NA | NA | NA |
| (0.97) | NA | NA | NA | NA | NA |
study_2 <-
list(
pop =
list(
handler = get_study_population,
network_handler = sim_block_network,
network_handler_args =
list(N = 5000, K = 3,
prev_K = c(frame = .5, known = .2, hidden = .1),
rho_K = c(.05, .05, .05),
p_edge_within = list(frame = c(0.05, 0.05),
known = c(0.1, 0.05),
hidden = c(0.2, 0.9)),
p_edge_between = list(frame = 0.05,
known = 0.1,
hidden = 0.01),
directed = FALSE),
group_names = c("frame", "known", "hidden"),
# probability of visibility (show-up) for each group
p_visible = list(frame = 1, known = 1, hidden = .6),
# probability of service utilization in hidden population
# for service multiplier
add_groups =
list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.3)",
"purrr::map_df(hidden, ~ sapply( `names<-`(runif(10, 0.05,.3), paste0('loc_', 1:10)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
known_2 = 0.1, known_3 = 0.2)
),
sample =
list(
rds = list(handler = sample_rds,
# RDS parameters
sampling_variable = "rds",
hidden_var = "hidden",
n_seed = 100,
n_coupons = 3,
target_type = "waves",
target_n_rds = 2),
tls = list(handler = sample_tls,
sampling_variable = "tls",
# TLS sampling parameters
target_n_clusters = 4,
target_n_tls = 300,
cluster = paste0("loc_", 1:8)),
pps = list(handler = sample_pps,
sampling_variable = "pps",
# prop sampling parameters
sampling_frame = "frame",
strata = NULL,
cluster = NULL,
target_n_pps = 800)
),
inquiries = list(handler = get_study_estimands,
known_pattern = "^known$|^frame$",
hidden_var = "hidden"),
estimators =
list(
rds =
list(sspse = list(handler = get_study_est_sspse,
label = "rds_sspse",
prior_mean = 450,
mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
total = 5000,
rds_prefix = "rds"),
chords = list(handler = get_study_est_chords,
type = "mle",
label = "rds_chords",
seed_condition = "rds_from == -999",
n_boot = 100,
rds_prefix = "rds"),
multiplier = list(handler = get_study_est_multiplier,
service_var = "service_use",
seed_condition = "rds_from == -999",
n_boot = 100,
rds_prefix = "rds",
label = "rds_multi")),
tls =
list(ht = list(handler = get_study_est_ht,
weight_var = "tls_weight",
prefix = "tls",
label = "tls_ht"),
nsum = list(handler = get_study_est_nsum,
known = c("known", "frame"),
hidden = "hidden_visible_out",
survey_design = ~ tls_cluster,
n_boot = 100,
prefix = "tls",
label = "tls_nsum"),
recap = list(handler = get_study_est_recapture,
capture_vars = paste0("loc_", 1:8),
sample_condition = "tls == 1",
model = "Mt",
hidden_variable = "hidden",
label = "tls_recap")),
pps =
list(
ht = list(handler = get_study_est_ht,
label = "pps_ht"),
nsum = list(handler = get_study_est_nsum,
known = c("frame", "known"),
hidden = "hidden_visible_out",
survey_design = ~ pps_cluster + strata(pps_strata),
n_boot = 100,
label = "pps_nsum")),
all =
list(recap1 = list(handler = get_study_est_recapture,
capture_vars = c("rds", "pps"),
model = "Mt",
hidden_variable = "hidden",
label = "rds_pps_recap"),
recap2 = list(handler = get_study_est_recapture,
capture_vars = c("rds", paste0("loc_", 1:8)),
model = "Mt",
hidden_variable = "hidden",
label = "rds_tls_recap"))
)
)
study_3 <-
list(
pop =
list(
handler = get_study_population,
# network structure setup
network_handler = sim_block_network,
network_handler_args =
list(N = 2000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = .3,
p_edge_within = list(known = c(0.05, 0.1), hidden = c(0.05, 0.7)),
p_edge_between = list(known = 0.05, hidden = 0.05),
directed = FALSE),
# groups
group_names = c("known", "hidden"),
# probability of visibility (show-up) for each group
p_visible = list(known = 1, hidden = .7),
# probability of service utilization in hidden population
# for service multiplier
add_groups =
list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.05)",
"purrr::map_df(hidden, ~ sapply( `names<-`(c(.2, .1, .3, .2), paste0('loc_', 1:4)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
known_2 = 0.1, known_3 = 0.2)
),
sample =
list(
rds = list(handler = sample_rds,
# RDS parameters
sampling_variable = "rds",
hidden_var = "hidden", # default
n_seed = 20,
n_coupons = 3,
target_type = "sample",
target_n_rds = 100),
tls = list(handler = sample_tls,
sampling_variable = "tls",
# TLS sampling parameters
target_n_clusters = 3,
target_n_tls = 300,
cluster = paste0("loc_", 1:4)),
pps = list(handler = sample_pps,
sampling_variable = "pps",
# prop sampling parameters
sampling_frame = NULL,
strata = NULL,
cluster = NULL,
target_n_pps = 400)
),
inquiries = list(handler = get_study_estimands,
known_pattern = "^known$",
hidden_var = "hidden"),
estimators =
list(
rds =
list(sspse = list(handler = get_study_est_sspse,
prior_mean = 200,
mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
total = 2000,
rds_prefix = "rds",
label = "rds_sspse"),
chords = list(handler = get_study_est_chords,
type = "mle",
seed_condition = "rds_from == -999",
n_boot = 100,
rds_prefix = "rds",
label = "rds_chords"),
multiplier = list(handler = get_study_est_multiplier,
service_var = "service_use",
seed_condition = "rds_from == -999",
n_boot = 100,
rds_prefix = "rds",
label = "rds_multi")),
tls =
list(ht = list(handler = get_study_est_ht,
weight_var = "tls_weight",
prefix = "tls",
label = "tls_ht"),
nsum = list(handler = get_study_est_nsum,
known = c("known", "known_2", "known_3"),
hidden = "hidden_visible_out",
survey_design = ~ tls_cluster,
n_boot = 100,
prefix = "tls",
label = "tls_nsum"),
recap = list(handler = get_study_est_recapture,
capture_vars = paste0("loc_", 1:4),
sample_condition = "tls == 1",
model = "Mt",
hidden_variable = "hidden",
label = "tls_recap")),
pps =
list(ht = list(handler = get_study_est_ht,
prefix = "pps",
label = "pps_ht"),
nsum = list(handler = get_study_est_nsum,
known = c("known", "known_2", "known_3"),
hidden = "hidden_visible_out",
survey_design = ~ pps_cluster + strata(pps_strata),
n_boot = 100,
prefix = "pps",
label = "pps_nsum")),
all =
list(recap1 = list(handler = get_study_est_recapture,
capture_vars = c("rds", "pps"),
model = "Mt",
hidden_variable = "hidden",
label = "rds_pps_recap"),
recap2 = list(handler = get_study_est_recapture,
capture_vars = c("rds", paste0("loc_", 1:4)),
model = "Mt",
hidden_variable = "hidden",
label = "rds_tls_recap"))
)
)multi_population <-
declare_population(handler = get_multi_populations,
pops_args = list(study_1 = study_1$pop,
study_2 = study_2$pop,
study_3 = study_3$pop))
multi_sampling <-
declare_sampling(handler = get_multi_samples,
samples_args = list(study_1 = study_1$sample,
study_2 = study_2$sample,
study_3 = study_3$sample))
multi_inquiry <-
declare_inquiry(handler = get_multi_estimands,
inquiries_args = list(study_1 = study_1$inquiries,
study_2 = study_2$inquiries,
study_3 = study_3$inquiries))
multi_estimators <-
declare_estimator(handler = get_multi_estimates,
estimators_args = list(study_1 = study_1$estimators,
study_2 = study_2$estimators,
study_3 = study_3$estimators))
multi_study <- multi_population + multi_sampling + multi_inquiry + multi_estimators
set.seed(872312)
draw_estimands(multi_study) %>%
kable()| inquiry | estimand |
|---|---|
| study_1_known_size | 318.0000000 |
| study_1_hidden_size | 100.0000000 |
| study_1_known_prev | 0.3180000 |
| study_1_hidden_prev | 0.1000000 |
| study_1_hidden_size_in_known | 38.0000000 |
| study_1_hidden_prev_in_known | 0.1194969 |
| study_2_frame_size | 2521.0000000 |
| study_2_known_size | 955.0000000 |
| study_2_hidden_size | 481.0000000 |
| study_2_frame_prev | 0.5042000 |
| study_2_known_prev | 0.1910000 |
| study_2_hidden_prev | 0.0962000 |
| study_2_hidden_size_in_frame | 269.0000000 |
| study_2_hidden_prev_in_frame | 0.1067037 |
| study_2_hidden_size_in_known | 114.0000000 |
| study_2_hidden_prev_in_known | 0.1193717 |
| study_3_known_size | 590.0000000 |
| study_3_hidden_size | 211.0000000 |
| study_3_known_prev | 0.2950000 |
| study_3_hidden_prev | 0.1055000 |
| study_3_hidden_size_in_known | 145.0000000 |
| study_3_hidden_prev_in_known | 0.2457627 |
draw_estimates(multi_study) %>%
kable()| estimator | estimate | se | inquiry |
|---|---|---|---|
| hidden_size_rds_sspse | 160.5000000 | 97.5311122 | study_1_hidden_size |
| hidden_size_rds_chords | 78.0000000 | 28.2386994 | study_1_hidden_size |
| hidden_size_rds_multi | 130.9090909 | 64.5431295 | study_1_hidden_size |
| hidden_size_tls_ht | 4.2110092 | 0.9546678 | study_1_hidden_size |
| hidden_prev_tls_ht | 0.0042110 | 0.0009547 | study_1_hidden_prev |
| hidden_size_tls_nsum | 106.4848460 | 3.8868799 | study_1_hidden_size |
| hidden_size_tls_recap | 65.8977969 | 28.2799918 | study_1_hidden_size |
| hidden_size_pps_ht | 80.0000000 | 19.0809789 | study_1_hidden_size |
| hidden_prev_pps_ht | 0.0800000 | 0.0190810 | study_1_hidden_prev |
| hidden_size_pps_nsum | 116.6671698 | 7.3704423 | study_1_hidden_size |
| hidden_size_rds_pps_recap | 78.7692308 | 9.1734484 | study_1_hidden_size |
| hidden_size_rds_tls_recap | 133.0891175 | 15.8443204 | study_1_hidden_size |
| hidden_size_rds_sspse | 445.5000000 | 513.5584734 | study_2_hidden_size |
| hidden_size_rds_chords | 362.0000000 | 118.3849243 | study_2_hidden_size |
| hidden_size_rds_multi | 555.2210526 | 35.5111343 | study_2_hidden_size |
| hidden_size_tls_ht | 49.3191882 | 22.7856632 | study_2_hidden_size |
| hidden_prev_tls_ht | 0.0098638 | 0.0045571 | study_2_hidden_prev |
| hidden_size_tls_nsum | 739.9761541 | 79.0004286 | study_2_hidden_size |
| hidden_size_tls_recap | 65.7249135 | 1.8443869 | study_2_hidden_size |
| hidden_size_pps_ht | 257.4800000 | 27.0490944 | study_2_hidden_size |
| hidden_prev_pps_ht | 0.0514960 | 0.0054098 | study_2_hidden_prev |
| hidden_size_pps_nsum | 207.6162547 | 26.6807344 | study_2_hidden_size |
| hidden_size_rds_pps_recap | 398.2972973 | 42.9184468 | study_2_hidden_size |
| hidden_size_rds_tls_recap | 551.2352545 | 6.3146870 | study_2_hidden_size |
| hidden_size_rds_sspse | 202.0000000 | 57.8507824 | study_3_hidden_size |
| hidden_size_rds_chords | 124.0000000 | 46.5259556 | study_3_hidden_size |
| hidden_size_rds_multi | 153.8461538 | 18.7141888 | study_3_hidden_size |
| hidden_size_tls_ht | 32.4611111 | 4.5375350 | study_3_hidden_size |
| hidden_prev_tls_ht | 0.0162306 | 0.0022688 | study_3_hidden_prev |
| hidden_size_tls_nsum | 440.2018810 | 28.4622269 | study_3_hidden_size |
| hidden_size_tls_recap | 66.9238085 | 4.1726843 | study_3_hidden_size |
| hidden_size_pps_ht | 195.0000000 | 28.5714286 | study_3_hidden_size |
| hidden_prev_pps_ht | 0.0975000 | 0.0142857 | study_3_hidden_prev |
| hidden_size_pps_nsum | 290.1977901 | 19.8818273 | study_3_hidden_size |
| hidden_size_rds_pps_recap | 201.0000000 | 27.9131306 | study_3_hidden_size |
| hidden_size_rds_tls_recap | 179.1636496 | 5.7694275 | study_3_hidden_size |
# multi_simulations <- simulate_design(multi_study, sims = 2)
requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)
set.seed(872312)
multi_simulations <-
plyr::llply(
as.list(1:100),
.fun = function(x) {
base::suppressWarnings(
DeclareDesign::simulate_design(multi_study, sims = 1) %>%
dplyr::mutate(
sim_ID = x
)
)
},
.parallel = TRUE
) %>%
dplyr::bind_rows()
saveRDS(multi_simulations, file = "vignettes/multi_sim.rds")multi_simulations <- readRDS(here::here("vignettes/multi_sim.rds"))
diagnose_design(simulations_df = multi_simulations,
diagnosands = study_diagnosands) %>%
reshape_diagnosis(digits = 2) %>%
dplyr::select(-'Design') %>%
dplyr::filter(`Mean Estimate` != "NA") %>%
DT::datatable(options = list(scrollX = TRUE, pageLength = 15))Conduct multi-study design for as many sampling-estimator pairs in each study as possible, then diagnoise the multi-study design. Calculate average (across simulations) estimand and bias of sampling-estimator for each of the studies and estimator sampling strategies. These will serve as population we will be drawing population-sampling-estimator triads
Estimands include:
As mentioned before sampling will consist of drawing population-sampling-estimator triads from the population presuming that each study uses at least two sampling strategies at a time
Once we draw sample we use Stan model to estimate study-specific estimands and sampling-estimator specific errors (biases)
We have all parts to conduct the full cycle of meta-analyses
meta_population <-
declare_population(multi_design = multi_study, n_sim = 3, parallel = FALSE,
handler = get_meta_population)
meta_inquiry <-
declare_inquiry(study_estimand = "hidden_size",
samp_est_benchmark = "pps_ht",
handler = get_meta_estimands)
meta_sample <-
declare_sampling(sampling_variable = "meta",
selection_variables = c("sample", "estimator"),
samples_per_study = 2, estimator_per_sample = 3,
force = list(sample = "pps", estimator = "ht"),
handler = get_meta_sample)
meta_estimator <-
declare_estimator(sampling_variable = "meta",
which_estimand = "hidden_size",
benchmark = list(sample = "pps", estimator = "ht"),
parallel = FALSE,
stan_handler = get_meta_stan,
handler = get_meta_estimates)
meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator
set.seed(872312)
draw_estimands(meta_study) %>%
kable()| inquiry | estimand |
|---|---|
| bias_pps_ht | -75.5751389 |
| bias_pps_nsum | -71.4793073 |
| bias_rds_chords | -109.5555556 |
| bias_rds_multi | 6.0433312 |
| bias_rds_pps_recap | -8.1517712 |
| bias_rds_sspse | 457.1111111 |
| bias_rds_tls_recap | 0.9378198 |
| bias_sd_pps_ht | 123.4264977 |
| bias_sd_pps_nsum | 214.8833125 |
| bias_sd_rds_chords | 165.3839397 |
| bias_sd_rds_multi | 25.8688849 |
| bias_sd_rds_pps_recap | 34.0001860 |
| bias_sd_rds_sspse | 710.6276847 |
| bias_sd_rds_tls_recap | 2.6262751 |
| bias_sd_tls_ht | 183.3685072 |
| bias_sd_tls_nsum | 124.4426607 |
| bias_sd_tls_recap | 195.7716216 |
| bias_tls_ht | -233.0425404 |
| bias_tls_nsum | 124.6250478 |
| bias_tls_recap | -203.3617179 |
| rel_bias_pps_ht | 1.0000000 |
| rel_bias_pps_nsum | 0.9368880 |
| rel_bias_rds_chords | 1.4782626 |
| rel_bias_rds_multi | -0.0568482 |
| rel_bias_rds_pps_recap | 0.1282246 |
| rel_bias_rds_sspse | -5.9293901 |
| rel_bias_rds_tls_recap | -0.0121496 |
| rel_bias_sd_pps_ht | 1.0000000 |
| rel_bias_sd_pps_nsum | 1.7537049 |
| rel_bias_sd_rds_chords | 1.3302692 |
| rel_bias_sd_rds_multi | 0.2008103 |
| rel_bias_sd_rds_pps_recap | 0.2707674 |
| rel_bias_sd_rds_sspse | 5.9693857 |
| rel_bias_sd_rds_tls_recap | 0.0207805 |
| rel_bias_sd_tls_ht | 1.5034308 |
| rel_bias_sd_tls_nsum | 1.0205011 |
| rel_bias_sd_tls_recap | 1.6034942 |
| rel_bias_tls_ht | 3.1280115 |
| rel_bias_tls_nsum | -1.6760795 |
| rel_bias_tls_recap | 2.7298581 |
| study_1_hidden_size | 94.6666667 |
| study_2_hidden_size | 490.3333333 |
| study_3_hidden_size | 209.3333333 |
draw_estimates(meta_study) %>%
kable()
#> Running /usr/lib/R/bin/R CMD SHLIB foo.c
#> gcc -std=gnu99 -I"/usr/share/R/include" -DNDEBUG -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/Rcpp/include/" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/unsupported" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/BH/include" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/src/" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppParallel/include/" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -fpic -g -O2 -ffile-prefix-map=/build/r-base-kbcSEe/r-base-4.1.1=. -flto=auto -ffat-lto-objects -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g -c foo.c -o foo.o
#> In file included from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Core:88,
#> from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Dense:1,
#> from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
#> from <command-line>:
#> /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name ‘namespace’
#> 628 | namespace Eigen {
#> | ^~~~~~~~~
#> /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:17: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
#> 628 | namespace Eigen {
#> | ^
#> In file included from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Dense:1,
#> from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
#> from <command-line>:
#> /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Core:96:10: fatal error: complex: No such file or directory
#> 96 | #include <complex>
#> | ^~~~~~~~~
#> compilation terminated.
#> make: *** [/usr/lib/R/etc/Makeconf:168: foo.o] Error 1| estimator | estimate | se | inquiry |
|---|---|---|---|
| rel_bias_pps_ht_meta | 1.0000000 | 0.0000000 | rel_bias_pps_ht |
| rel_bias_rds_sspse_meta | 9.0255787 | 0.9682940 | rel_bias_rds_sspse |
| rel_bias_rds_multi_meta | 2.0092177 | 0.5122053 | rel_bias_rds_multi |
| rel_bias_pps_nsum_meta | 1.2651668 | 0.1307401 | rel_bias_pps_nsum |
| rel_bias_rds_pps_recap_meta | 2.0172260 | 0.2839682 | rel_bias_rds_pps_recap |
| rel_bias_rds_chords_meta | 0.7451670 | 0.4735371 | rel_bias_rds_chords |
| rel_bias_tls_ht_meta | 0.2008796 | 0.0290836 | rel_bias_tls_ht |
| rel_bias_tls_recap_meta | 0.3716098 | 0.0425775 | rel_bias_tls_recap |
| study_1_meta | 68.6466563 | 15.1119178 | study_1_hidden_size |
| study_2_meta | 363.3712115 | 73.2594432 | study_2_hidden_size |
| study_3_meta | 308.6784268 | 63.8887280 | study_3_hidden_size |
requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)
set.seed(872312)
meta_simulations <-
plyr::llply(
as.list(1:100),
.fun = function(x) {
base::suppressWarnings(
DeclareDesign::simulate_design(meta_study, sims = 1) %>%
dplyr::mutate(
sim_ID = x
)
)
},
.parallel = TRUE
) %>%
dplyr::bind_rows()
saveRDS(meta_simulations, file = "vignettes/meta_sim.rds")meta_simulations <- readRDS(here::here("vignettes/meta_sim.rds"))
diagnose_design(simulations_df = meta_simulations,
diagnosands = study_diagnosands) %>%
reshape_diagnosis(digits = 2) %>%
dplyr::select(-'Design') %>%
dplyr::filter(`Mean Estimate` != "NA") %>%
DT::datatable(options = list(scrollX = TRUE, pageLength = 15))